package Job

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object ListedComUnLimit {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext = new SparkContext("local[*]", "ListedComUnLimit")
    val inputPath: String = "file/JobData/input"
    val outputPath: String = "file/JobData/output/ListedComUnLimit"

    val InfoLine: RDD[String] = sc.textFile(inputPath)
      .filter(x => x.split(",")(8)=="不限" && x.split(",")(16)=="上市公司")
      .repartition(2)

    val res = InfoLine.map(x =>{
      (x.split(",")(17),1)
    }).reduceByKey(_ + _)
      .sortBy(_._2,false)
      .take(5)

    res.foreach(println)
    sc.stop()
  }

}
